import pandas as pd
import matplotlib.pyplot as plt
from pylab import *
import numpy as np

titanic = pd.read_csv('train.csv')
# 为了显示完整的数据输出图
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)
#这里默认给了平均值28
titanic['Age'] = titanic['Age'].fillna(titanic['Age'].median())

# 无法处理非数字类，为了对男女和救没救进行分类，改为0/1
titanic.loc[titanic['Sex']=='male','Sex']=0
titanic.loc[titanic['Sex']=='female','Sex']=1
#这里默认给了S
titanic['Embarked'] = titanic['Embarked'].fillna('S')
titanic.loc[titanic['Embarked']=='S','Embarked']=0
titanic.loc[titanic['Embarked']=='C','Embarked']=1
titanic.loc[titanic['Embarked']=='Q','Embarked']=2

# 开始建模分析
#
mpl.rcParams['font.sans-serif'] = ['SimHei']

titanic['Age'][titanic['Pclass'] == 1].plot(kind='kde')
titanic['Age'][titanic['Pclass'] == 2].plot(kind='kde')
titanic['Age'][titanic['Pclass'] == 3].plot(kind='kde')
plt.xlabel(u"年龄")# plots an axis lable
plt.ylabel(u"密度")
plt.title(u"各等级的乘客年龄分布")
plt.legend((u'头等舱', u'2等舱',u'3等舱'),loc='best') # sets our legend for our graph.
plt.show()